# ============================== SETUP ===============================
rm(list = ls())
options(scipen = 999)
setwd("~/Dropbox/Wayne-Ying/White_Nationalist_Recruitment/replication/codes")
library(tidyverse)
library(data.table)
library(dplyr)
library(ggplot2)
library(zoo)
library(fixest)
library(knitr)
library(texreg)

# ========================= DATA WRANGLING ==========================
tweets <- fread("../datasets/input/tweets.csv", stringsAsFactors = FALSE)
tweets <- tweets[tweets$RelevantLogit==1,]
tweets <- tweets[tweets$ads_dict==0,]
tweets$date  <- as.Date(tweets$created_at)
tweets$month <- as.yearmon(tweets$date)
tweets$year  <- lubridate::year(tweets$date)
tweets <- tweets[tweets$date>=as.Date("2016-08-15") & tweets$date<=as.Date("2021-05-15"),]

gabs <- fread("../datasets/input/gabs.csv", stringsAsFactors = FALSE)
gabs <- gabs[gabs$RelevantLogit==1,]
gabs <- gabs[gabs$ads_dict==0,]
gabs$date  <- as.Date(gabs$created_at)
gabs$month <- as.yearmon(gabs$date)
gabs$year  <- lubridate::year(gabs$date)
gabs <- gabs[gabs$date>=as.Date("2016-08-15") & gabs$date<=as.Date("2021-05-15"),]

# ====================== THEME CODING (TWO-KEYWORD) =================
tweets$race_dict        <- ifelse(tweets$race_dict >=2, 1, 0)
tweets$gender_dict      <- ifelse(tweets$gender_dict >=2, 1, 0)
tweets$nationalism_dict <- ifelse(tweets$nationalism_dict >=2, 1, 0)
tweets$partisan_dict    <- ifelse(tweets$partisan_dict >=2, 1, 0)
tweets$religion_dict    <- ifelse(tweets$religion_dict >=2, 1, 0)
tweets$Benevolent_dict           <- ifelse(tweets$Benevolent_dict >= 2, 1, 0)
tweets$Feminism_dict             <- ifelse(tweets$Feminism_dict >= 2, 1, 0)
tweets$GenderIdentification_dict <- ifelse(tweets$GenderIdentification_dict >= 2, 1, 0)
tweets$General_dict              <- ifelse(tweets$General_dict >= 2, 1, 0)
tweets$Hostile_dict              <- ifelse(tweets$Hostile_dict >= 2, 1, 0)
tweets$ReproductiveRights_dict   <- ifelse(tweets$ReproductiveRights_dict >= 2, 1, 0)
tweets$SexualOrientation_dict    <- ifelse(tweets$SexualOrientation_dict >= 2, 1, 0)

gabs$race_dict          <- ifelse(gabs$race_dict >=2, 1, 0)
gabs$gender_dict        <- ifelse(gabs$gender_dict >=2, 1, 0)
gabs$nationalism_dict   <- ifelse(gabs$nationalism_dict >=2, 1, 0)
gabs$partisan_dict      <- ifelse(gabs$partisan_dict >=2, 1, 0)
gabs$religion_dict      <- ifelse(gabs$religion_dict >=2, 1, 0)
gabs$Benevolent_dict           <- ifelse(gabs$Benevolent_dict >= 2, 1, 0)
gabs$Feminism_dict             <- ifelse(gabs$Feminism_dict >= 2, 1, 0)
gabs$GenderIdentification_dict <- ifelse(gabs$GenderIdentification_dict >= 2, 1, 0)
gabs$General_dict              <- ifelse(gabs$General_dict >= 2, 1, 0)
gabs$Hostile_dict              <- ifelse(gabs$Hostile_dict >= 2, 1, 0)
gabs$ReproductiveRights_dict   <- ifelse(gabs$ReproductiveRights_dict >= 2, 1, 0)
gabs$SexualOrientation_dict    <- ifelse(gabs$SexualOrientation_dict >= 2, 1, 0)

tweets$n_themes <- tweets$race_dict + tweets$gender_dict + tweets$nationalism_dict + tweets$partisan_dict + tweets$religion_dict
gabs$n_themes   <- gabs$race_dict   + gabs$gender_dict   + gabs$nationalism_dict   + gabs$partisan_dict   + gabs$religion_dict
table(tweets$n_themes)
table(gabs$n_themes)

tweets$reaction <- tweets$rts + tweets$replys + tweets$likes
gabs$reaction   <- gabs$reblogs_count + gabs$replies_count + gabs$favourites_count

tweets$seed <- ifelse(tweets$seedfollow==999, 1, 0)
gabs$seed   <- ifelse(gabs$seedfollow==999, 1, 0)

# ========================== SUBCATEGORIES OF GENDER =========================

# A) Twitter counts and shares
twitter_subcat_counts <- tweets %>%
  summarise(
    Benevolent_dict           = sum(Benevolent_dict, na.rm = TRUE),
    Feminism_dict             = sum(Feminism_dict, na.rm = TRUE),
    GenderIdentification_dict = sum(GenderIdentification_dict, na.rm = TRUE),
    General_dict              = sum(General_dict, na.rm = TRUE),
    Hostile_dict              = sum(Hostile_dict, na.rm = TRUE),
    ReproductiveRights_dict   = sum(ReproductiveRights_dict, na.rm = TRUE),
    SexualOrientation_dict    = sum(SexualOrientation_dict, na.rm = TRUE)
  )

# Sum across all subcategories
twitter_total_hits <- rowSums(twitter_subcat_counts)

# Convert each raw count to a fraction of the total
twitter_shares <- twitter_subcat_counts / twitter_total_hits

# Add platform label up front
twitter_shares <- twitter_shares %>%
  mutate(platform = "Twitter") %>%
  relocate(platform)

# B) Gab counts and shares
gab_subcat_counts <- gabs %>%
  summarise(
    Benevolent_dict           = sum(Benevolent_dict, na.rm = TRUE),
    Feminism_dict             = sum(Feminism_dict, na.rm = TRUE),
    GenderIdentification_dict = sum(GenderIdentification_dict, na.rm = TRUE),
    General_dict              = sum(General_dict, na.rm = TRUE),
    Hostile_dict              = sum(Hostile_dict, na.rm = TRUE),
    ReproductiveRights_dict   = sum(ReproductiveRights_dict, na.rm = TRUE),
    SexualOrientation_dict    = sum(SexualOrientation_dict, na.rm = TRUE)
  )

gab_total_hits <- rowSums(gab_subcat_counts)
gab_shares <- gab_subcat_counts / gab_total_hits
gab_shares <- gab_shares %>%
  mutate(platform = "Gab") %>%
  relocate(platform)

# C) Combine into a 2-row data frame
subcat_share_table <- bind_rows(twitter_shares, gab_shares)

# Print to LaTeX with 5 decimal places
kable(
  subcat_share_table,
  format = "latex",
  digits = 5,
  booktabs = TRUE,
  caption = "Distribution of subcategory hits among all gender subcategories (sums to 1 per platform)."
)

# =================== POOL PLATFORMS (HARMONIZE FIELDS) ========================
tweets2 <- tweets
#colnames(tweets2)[colnames(tweets2)=="twitter_handle"]  <- "username"
colnames(tweets2)[colnames(tweets2)=="rts"]             <- "reblogs_count"
colnames(tweets2)[colnames(tweets2)=="replys"]          <- "replies_count"
colnames(tweets2)[colnames(tweets2)=="likes"]           <- "favourites_count"
colnames(tweets2)[colnames(tweets2)=="follower_count"]  <- "followers_count"
tweets2$platform <- "twitter"
gabs$platform    <- "gab"

colnames(tweets2)[colnames(tweets2) %in% colnames(gabs)]

gabtweet <- rbind(
  tweets2[,c("created_at","username","reblogs_count","replies_count","favourites_count","seedfollow",
             "RelevantLogit","ads_dict","ntoken","gender_dict","nationalism_dict","partisan_dict",
             "race_dict","religion_dict","n_themes","reaction","year","date","month","seed","platform",
             "followers_count","overlap")],
  gabs[,c("created_at","username","reblogs_count","replies_count","favourites_count","seedfollow",
          "RelevantLogit","ads_dict","ntoken","gender_dict","nationalism_dict","partisan_dict",
          "race_dict","religion_dict","n_themes","reaction","year","date","month","seed","platform",
          "followers_count","overlap")]
)

gabtweet$repost_log   <- log(gabtweet$reblogs_count+1)
gabtweet$like_log     <- log(gabtweet$favourites_count+1)
gabtweet$reaction_log <- log(gabtweet$reaction+1)

# ========================= DESCRIPTIVE STATISTICS ==============================
##### Figure 1: Distribution of Followers (Leaders vs Followers)
users <- gabtweet %>%
  filter(!is.na(followers_count)) %>%
  group_by(username) %>%
  summarise(
    followers_count_max = max(followers_count, na.rm = TRUE),
    seed_user = as.integer(max(seed, na.rm = TRUE))
  ) %>%
  ungroup() %>%
  filter(is.finite(followers_count_max), followers_count_max >= 0)

users_pos <- users %>% filter(followers_count_max > 0)
users_pos <- users_pos %>%
  mutate(group = ifelse(seed_user == 1, "Leaders'", "Followers'"),
         count_log = log(followers_count_max)) %>%
  mutate(group = factor(group, levels = c("Leaders'", "Followers'")))

pdf("../plots/Figure1.pdf", width = 7, height = 4)
ggplot(users_pos, aes(x = count_log, linetype = group)) +
  geom_density(adjust = 1.1, linewidth = 1) +
  scale_linetype_manual(values = c("Leaders'" = "solid", "Followers'" = "dashed")) +
  labs(x = "Follower Count (log)", y = "Density") +
  theme_minimal() +
  theme(
    legend.title = element_blank(),
    legend.position = "bottom"
  )
dev.off()

##### Table 1: Posts Coded According to Each Theme
themes <- matrix(nrow = 2, ncol = 5)
themes[1,] <- c(sum(gabtweet$race_dict==1), sum(gabtweet$nationalism_dict==1), sum(gabtweet$gender_dict==1),
                sum(gabtweet$partisan_dict==1), sum(gabtweet$religion_dict==1))
themes[2,] <- c(sum(gabtweet$race_dict==1)/nrow(gabtweet), sum(gabtweet$nationalism_dict==1)/nrow(gabtweet),
                sum(gabtweet$gender_dict==1)/nrow(gabtweet), sum(gabtweet$partisan_dict==1)/nrow(gabtweet),
                sum(gabtweet$religion_dict==1)/nrow(gabtweet))
stargazer::stargazer(themes, summary = FALSE)

##### Table A8: Leaders’ Posts Coded According to Each Theme
gabtweetleaders <- gabtweet[gabtweet$seed==1,]
themes <- matrix(nrow = 2, ncol = 5)
themes[1,] <- c(sum(gabtweetleaders$race_dict==1), sum(gabtweetleaders$nationalism_dict==1), sum(gabtweetleaders$gender_dict==1),
                sum(gabtweetleaders$partisan_dict==1), sum(gabtweetleaders$religion_dict==1))
themes[2,] <- c(sum(gabtweetleaders$race_dict==1)/nrow(gabtweetleaders), sum(gabtweetleaders$nationalism_dict==1)/nrow(gabtweetleaders),
                sum(gabtweetleaders$gender_dict==1)/nrow(gabtweetleaders), sum(gabtweetleaders$partisan_dict==1)/nrow(gabtweetleaders),
                sum(gabtweetleaders$religion_dict==1)/nrow(gabtweetleaders))
stargazer::stargazer(themes, summary = FALSE)

##### Table 2: Correlation Coefficients Between Themes
correlation <- matrix(nrow = 6, ncol = 6)
correlation[1,] <- c(" ", "Race", "Nationalism", "Gender", "Partisan", "Religion")
correlation[,1] <- c(" ", "Race", "Nationalism", "Gender", "Partisan", "Religion")
correlation[2,2]     <- c(cor(gabtweet$race_dict, gabtweet$race_dict))
correlation[3,2:3]   <- round(c(cor(gabtweet$nationalism_dict, gabtweet$race_dict),
                                cor(gabtweet$nationalism_dict, gabtweet$nationalism_dict)), 3)
correlation[4,2:4]   <- round(c(cor(gabtweet$gender_dict, gabtweet$race_dict),
                                cor(gabtweet$gender_dict, gabtweet$nationalism_dict),
                                cor(gabtweet$gender_dict, gabtweet$gender_dict)), 3)
correlation[5,2:5]   <- round(c(cor(gabtweet$partisan_dict, gabtweet$race_dict),
                                cor(gabtweet$partisan_dict, gabtweet$nationalism_dict),
                                cor(gabtweet$partisan_dict, gabtweet$gender_dict),
                                cor(gabtweet$partisan_dict, gabtweet$partisan_dict)), 3)
correlation[6,2:6]   <- round(c(cor(gabtweet$religion_dict, gabtweet$race_dict),
                                cor(gabtweet$religion_dict, gabtweet$nationalism_dict),
                                cor(gabtweet$religion_dict, gabtweet$gender_dict),
                                cor(gabtweet$religion_dict, gabtweet$partisan_dict),
                                cor(gabtweet$religion_dict, gabtweet$religion_dict)), 3)
stargazer::stargazer(correlation)

##### Table A9: Correlation Coefficients (Leaders’ Posts)
correlation <- matrix(nrow = 6, ncol = 6)
correlation[1,] <- c(" ", "Race", "Nationalism", "Gender", "Partisan", "Religion")
correlation[,1] <- c(" ", "Race", "Nationalism", "Gender", "Partisan", "Religion")
correlation[2,2]     <- c(cor(gabtweetleaders$race_dict, gabtweetleaders$race_dict))
correlation[3,2:3]   <- round(c(cor(gabtweetleaders$nationalism_dict, gabtweetleaders$race_dict),
                                cor(gabtweetleaders$nationalism_dict, gabtweetleaders$nationalism_dict)), 3)
correlation[4,2:4]   <- round(c(cor(gabtweetleaders$gender_dict, gabtweetleaders$race_dict),
                                cor(gabtweetleaders$gender_dict, gabtweetleaders$nationalism_dict),
                                cor(gabtweetleaders$gender_dict, gabtweetleaders$gender_dict)), 3)
correlation[5,2:5]   <- round(c(cor(gabtweetleaders$partisan_dict, gabtweetleaders$race_dict),
                                cor(gabtweetleaders$partisan_dict, gabtweetleaders$nationalism_dict),
                                cor(gabtweetleaders$partisan_dict, gabtweetleaders$gender_dict),
                                cor(gabtweetleaders$partisan_dict, gabtweetleaders$partisan_dict)), 3)
correlation[6,2:6]   <- round(c(cor(gabtweetleaders$religion_dict, gabtweetleaders$race_dict),
                                cor(gabtweetleaders$religion_dict, gabtweetleaders$nationalism_dict),
                                cor(gabtweetleaders$religion_dict, gabtweetleaders$gender_dict),
                                cor(gabtweetleaders$religion_dict, gabtweetleaders$partisan_dict),
                                cor(gabtweetleaders$religion_dict, gabtweetleaders$religion_dict)), 3)
stargazer::stargazer(correlation)

# =============================== REGRESSIONS ================================
gabtweet$ntoken10 <- gabtweet$ntoken/10

###### Table 3: Leader Posts Garner Higher Engagement than Non-Leader Posts
leaderall_l <- feols(like_log   ~ seed + ntoken10 + log(followers_count+1) | date + platform, data = gabtweet)
leadert_l   <- feols(like_log   ~ seed + ntoken10 + log(followers_count+1) | date, data = gabtweet[gabtweet$platform=="twitter",])
leaderg_l   <- feols(like_log   ~ seed + ntoken10 + log(followers_count+1) | date, data = gabtweet[gabtweet$platform=="gab",])
leaderall_r <- feols(repost_log ~ seed + ntoken10 + log(followers_count+1) | date + platform, data = gabtweet)
leadert_r   <- feols(repost_log ~ seed + ntoken10 + log(followers_count+1) | date, data = gabtweet[gabtweet$platform=="twitter",])
leaderg_r   <- feols(repost_log ~ seed + ntoken10 + log(followers_count+1) | date, data = gabtweet[gabtweet$platform=="gab",])

texreg(list(summary(leaderall_l, cluster = ~ username),
            summary(leadert_l,   cluster = ~ username),
            summary(leaderg_l,   cluster = ~ username), 
            summary(leaderall_r, cluster = ~ username),
            summary(leadert_r,   cluster = ~ username),
            summary(leaderg_r,   cluster = ~ username)), stars = c(0.05))

##### Compare Overlapped Leaders Across Platforms
leaderT <- c(sum(tweets$race_dict[tweets$overlap==1]==1)/nrow(tweets[tweets$overlap==1,]),
             sum(tweets$nationalism_dict[tweets$overlap==1]==1)/nrow(tweets[tweets$overlap==1,]),
             sum(tweets$gender_dict[tweets$overlap==1]==1)/nrow(tweets[tweets$overlap==1,]),
             sum(tweets$partisan_dict[tweets$overlap==1]==1)/nrow(tweets[tweets$overlap==1,]),
             sum(tweets$religion_dict[tweets$overlap==1]==1)/nrow(tweets[tweets$overlap==1,]))

leaderG <- c(sum(gabs$race_dict[gabs$overlap==1]==1)/nrow(gabs[gabs$overlap==1,]),
             sum(gabs$nationalism_dict[gabs$overlap==1]==1)/nrow(gabs[gabs$overlap==1,]),
             sum(gabs$gender_dict[gabs$overlap==1]==1)/nrow(gabs[gabs$overlap==1,]),
             sum(gabs$partisan_dict[gabs$overlap==1]==1)/nrow(gabs[gabs$overlap==1,]),
             sum(gabs$religion_dict[gabs$overlap==1]==1)/nrow(gabs[gabs$overlap==1,]))

compare <- cbind(leaderT, leaderG)
compare <- round(compare, 3)
rownames(compare) <- c("Race","Nationalism","Gender","Partisan","Religion")
stargazer::stargazer(compare, summary = FALSE)

c(
  prop.test(x=c(sum(gabs$race_dict[gabs$overlap==1]==1,        na.rm=TRUE), sum(tweets$race_dict[tweets$overlap==1]==1,        na.rm=TRUE)),
            n=c(nrow(gabs[gabs$overlap==1]), nrow(tweets[tweets$overlap==1])) )$p.value,
  prop.test(x=c(sum(gabs$nationalism_dict[gabs$overlap==1]==1, na.rm=TRUE), sum(tweets$nationalism_dict[tweets$overlap==1]==1, na.rm=TRUE)),
            n=c(nrow(gabs[gabs$overlap==1]), nrow(tweets[tweets$overlap==1])) )$p.value,
  prop.test(x=c(sum(gabs$gender_dict[gabs$overlap==1]==1,      na.rm=TRUE), sum(tweets$gender_dict[tweets$overlap==1]==1,      na.rm=TRUE)),
            n=c(nrow(gabs[gabs$overlap==1]), nrow(tweets[tweets$overlap==1])) )$p.value,
  prop.test(x=c(sum(gabs$partisan_dict[gabs$overlap==1]==1,    na.rm=TRUE), sum(tweets$partisan_dict[tweets$overlap==1]==1,    na.rm=TRUE)),
            n=c(nrow(gabs[gabs$overlap==1]), nrow(tweets[tweets$overlap==1])) )$p.value,
  prop.test(x=c(sum(gabs$religion_dict[gabs$overlap==1]==1,    na.rm=TRUE), sum(tweets$religion_dict[tweets$overlap==1]==1,    na.rm=TRUE)),
            n=c(nrow(gabs[gabs$overlap==1]), nrow(tweets[tweets$overlap==1])) )$p.value
)

# ================================ CO-OCCURRENCE ===============================
gabtweet$genderandrace <- ifelse(gabtweet$gender_dict==1 & gabtweet$race_dict==1, 1, 0)
gabtweet$genderorrace <- ifelse(gabtweet$gender_dict==1 | gabtweet$race_dict==1, 1, 0)

# -- Racialized Gender (followers only) ---------------------------------------
racializedgender <- gabtweet[gabtweet$seed == 0, ] %>%
  group_by(month) %>%
  summarize(
    genderandrace = sum(genderandrace, na.rm = TRUE),
    gender        = sum(gender_dict,    na.rm = TRUE),
    genderprop    = mean(gender_dict,   na.rm = TRUE),
    raceprop      = mean(race_dict,     na.rm = TRUE)
  )

racializedgender$racializedgender <- racializedgender$genderandrace / racializedgender$gender

# transform for secondary axis
racializedgender$transformed_racializedgender <-
  racializedgender$racializedgender / max(racializedgender$racializedgender) *
  max(c(racializedgender$raceprop, racializedgender$genderprop)) - 0.055

pdf("../plots/Figure7a.pdf")
ggplot(racializedgender) +
  geom_line(aes(x = month, y = raceprop,                   color = "Race",              linetype = "Race"),              linewidth = 1.3) +
  geom_line(aes(x = month, y = genderprop,                 color = "Gender",            linetype = "Gender"),            linewidth = 1.3) +
  geom_line(aes(x = month, y = transformed_racializedgender, color = "Racialized Gender", linetype = "Racialized Gender"), linewidth = 1.3) +
  labs(x = "", y = "Proportion of Race or Gender Themes", color = "Themes") +
  scale_color_manual(
    values = c("Race" = "chocolate4", "Gender" = "deeppink4", "Racialized Gender" = "black"),
    name   = "Themes",
    breaks = c("Race", "Gender", "Racialized Gender")
  ) +
  scale_linetype_manual(
    values = c("Race" = "dashed", "Gender" = "dashed", "Racialized Gender" = "solid"),
    name   = "Themes",
    breaks = c("Race", "Gender", "Racialized Gender")
  ) +
  scale_y_continuous(
    name = "Proportion of Gendered or Racial Rhetoric",
    sec.axis = sec_axis(
      ~ (. + 0.055) * (max(racializedgender$racializedgender) / max(c(racializedgender$raceprop, racializedgender$genderprop))),
      name = "Proportion of the Gender Theme That Is Racialized"
    )
  ) +
  theme_minimal() +
  theme(
    axis.title.y   = element_text(size = 15),
    axis.title.x   = element_text(size = 15),
    plot.title     = element_text(hjust = 0.5),
    legend.position = "bottom"
  )
dev.off()

# -- Nationalized Partisan (followers only) -----------------------------------
gabtweet$nationandparty <- ifelse(gabtweet$nationalism_dict == 1 & gabtweet$partisan_dict == 1, 1, 0)

nationparty <- gabtweet[gabtweet$seed == 0, ] %>%
  group_by(month) %>%
  summarize(
    nationandparty  = sum(nationandparty,   na.rm = TRUE),
    partisan        = sum(partisan_dict,    na.rm = TRUE),
    nationalismprop = mean(nationalism_dict, na.rm = TRUE),
    partisanprop    = mean(partisan_dict,    na.rm = TRUE)
  )

nationparty$nationalizedparty <- nationparty$nationandparty / nationparty$partisan

# transform for secondary axis
nationparty$transformed_nationalizedparty <-
  nationparty$nationalizedparty / max(nationparty$nationalizedparty) *
  max(c(nationparty$nationalismprop, nationparty$partisanprop)) - 0.055

pdf("../plots/Figure7b.pdf")
ggplot(nationparty) +
  geom_line(aes(x = month, y = nationalismprop,            color = "Nationalism",         linetype = "Nationalism"),         linewidth = 1.3) +
  geom_line(aes(x = month, y = partisanprop,               color = "Partisan",            linetype = "Partisan"),            linewidth = 1.3) +
  geom_line(aes(x = month, y = transformed_nationalizedparty, color = "Nationalized Partisan", linetype = "Nationalized Partisan"), linewidth = 1.3) +
  labs(x = "", y = "Proportion of Nationalist or Partisan Rhetoric", color = "Themes") +
  scale_color_manual(
    values = c("Nationalism" = "chocolate4", "Partisan" = "deeppink4", "Nationalized Partisan" = "black"),
    name   = "Themes",
    breaks = c("Nationalism", "Partisan", "Nationalized Partisan")
  ) +
  scale_linetype_manual(
    values = c("Nationalism" = "dashed", "Partisan" = "dashed", "Nationalized Partisan" = "solid"),
    name   = "Themes",
    breaks = c("Nationalism", "Partisan", "Nationalized Partisan")
  ) +
  scale_y_continuous(
    name = "Proportion of Nationalist or Partisan Rhetoric",
    sec.axis = sec_axis(
      ~ (. + 0.055) * (max(nationparty$nationalizedparty) / max(c(nationparty$nationalismprop, nationparty$partisanprop))),
      name = "Proportion of the Partisan Theme That Is Nationalized"
    )
  ) +
  theme_minimal() +
  theme(
    axis.title.y   = element_text(size = 15),
    axis.title.x   = element_text(size = 15),
    plot.title     = element_text(hjust = 0.5),
    legend.position = "bottom"
  )
dev.off()

# ================================ TIME TRENDS ===============================
aggmonth <- gabtweet %>% 
  group_by(month, seed) %>%
  summarize(Race = mean(race_dict, na.rm = TRUE),
            Nationalism = mean(nationalism_dict, na.rm = TRUE),
            Gender = mean(gender_dict, na.rm = TRUE),
            Partisan = mean(partisan_dict, na.rm = TRUE),
            Religion = mean(religion_dict, na.rm = TRUE),
            n = n())

aggmonth$month <- as.yearmon(aggmonth$month)

pdf("../plots/FigureA4A5.pdf", width = 7, height = 4)

# Race
plot(aggmonth$month[aggmonth$seed==1], aggmonth$Race[aggmonth$seed==1],
     col="blue", type="l", lty=1, lwd=3,
     ylab="Racial Rhetoric", xlab="Time", ylim=c(0.15, 0.45))
lines(aggmonth$month[aggmonth$seed==0], aggmonth$Race[aggmonth$seed==0],
      col="lightblue", type="l", lty=4, lwd=3)
legend(as.yearmon("October 2020"), 0.44, legend=c("Leaders","Followers"),
       col=c("blue","lightblue"), lty=c(1,2), lwd=c(3,4), cex=0.6)
abline(v = as.yearmon("November 2016"), lty = 3, col = "gray")
text(x = as.yearmon("November 2016"), y = 0.39, labels = "US\nPresidential\nElection", cex = 0.6)
abline(v = as.yearmon("February 2017"), lty = 3, col = "gray")
text(x = as.yearmon("February 2017"), y = 0.32, labels = "Trump\nMuslim Ban\nChallenged\nin Courts", cex = 0.6)
abline(v = as.yearmon("August 2018"), lty = 3, col = "gray")
text(x = as.yearmon("August 2018"), y = 0.42, labels = "Unite\nthe Right\nRally", cex = 0.6)
abline(v = as.yearmon("November 2018"), lty = 3, col = "gray")
text(x = as.yearmon("November 2018"), y = 0.19, labels = "US\nMidterm\nElections", cex = 0.6)
abline(v = as.yearmon("September 2019"), lty = 3, col = "gray")
text(x = as.yearmon("September 2019"), y = 0.19, labels = "Impeachment\nHearings\nStart in\nUS House", cex = 0.6)
abline(v = as.yearmon("January 2020"), lty = 3, col = "gray")
text(x = as.yearmon("January 2020"), y = 0.42, labels = "Impeachment\nTrial\nin Senate\nFails", cex = 0.6)
abline(v = as.yearmon("April 2020"), lty = 3, col = "gray")
text(x = as.yearmon("April 2020"), y = 0.18, labels = "Covid\nLockdown", cex = 0.6)
abline(v = as.yearmon("June 2020"), lty = 3, col = "gray")
text(x = as.yearmon("June 2020"), y = 0.4, labels = "George\nFloyd\nBLM\nProtests", cex = 0.6)
abline(v = as.yearmon("November 2020"), lty = 3, col = "gray")
text(x = as.yearmon("November 2020"), y = 0.35, labels = "US\nPresidential\nElection", cex = 0.6)
abline(v = as.yearmon("January 2021"), lty = 3, col = "gray")
text(x = as.yearmon("January 2021"), y = 0.17, labels = "January 6\nInsurrection", cex = 0.6)

# Nationalism
plot(aggmonth$month[aggmonth$seed==1], aggmonth$Nationalism[aggmonth$seed==1],
     col="blue", type="l", lty=1, lwd=3,
     ylab="Nationalist Rhetoric", xlab="Time", ylim=c(0.02, 0.138))
lines(aggmonth$month[aggmonth$seed==0], aggmonth$Nationalism[aggmonth$seed==0],
      col="lightblue", type="l", lty=4, lwd=3)
legend(as.yearmon("October 2020"), 0.136, legend=c("Leaders","Followers"),
       col=c("blue","lightblue"), lty=c(1,2), lwd=c(3,4), cex=0.6)
abline(v = as.yearmon("Jan 2017"), lty = 3, col = "gray")
text(x = as.yearmon("Jan 2017"), y = 0.11, labels = "Withdrawal from\nthe TPP\n \nMuslim Travel Ban", cex = 0.6)
abline(v = as.yearmon("Jun 2017"), lty = 3, col = "gray")
text(x = as.yearmon("Jun 2017"), y = 0.075, labels = "Withdrawal from\nParis\nClimateAccord", cex = 0.6)
abline(v = as.yearmon("Sep 2017"), lty = 3, col = "gray")
text(x = as.yearmon("Sep 2017"), y = 0.12, labels = "Trump Speaks\nat UN", cex = 0.6)
abline(v = as.yearmon("Jan 2018"), lty = 3, col = "gray")
text(x = as.yearmon("Jan 2018"), y = 0.028, labels = "Trade War\nwith\nChina", cex = 0.6)
abline(v = as.yearmon("May 2018"), lty = 3, col = "gray")
text(x = as.yearmon("May 2018"), y = 0.12, labels = "Withdrawal\nfrom\nIran Deal", cex = 0.6)
abline(v = as.yearmon("Jun 2018"), lty = 3, col = "gray")
text(x = as.yearmon("Jun 2018"), y = 0.05, labels = "Child Separation\nPolicy at\nMexico Border", cex = 0.6)
abline(v = as.yearmon("Jun 2019"), lty = 3, col = "gray")
text(x = as.yearmon("Jun 2019"), y = 0.12, labels = "Trump\nMeets with\nKim Jung Un", cex = 0.6)
abline(v = as.yearmon("Jan 2020"), lty = 3, col = "gray")
text(x = as.yearmon("Jan 2020"), y = 0.04, labels = "NAFTA\nReplaced with\nUSMCA", cex = 0.6)
abline(v = as.yearmon("Mar 2020"), lty = 3, col = "gray")
text(x = as.yearmon("Mar 2020"), y = 0.12, labels = "Pandemic\nDeclared\nNational\nEmergency", cex = 0.6)

# Gender
plot(aggmonth$month[aggmonth$seed==1], aggmonth$Gender[aggmonth$seed==1],
     col="blue", type="l", lty=1, lwd=3,
     ylab="Gender Rhetoric", xlab="Time", ylim=c(0.01, 0.1))
lines(aggmonth$month[aggmonth$seed==0], aggmonth$Gender[aggmonth$seed==0],
      col="lightblue", type="l", lty=4, lwd=3)
legend(as.yearmon("October 2020"), 0.095, legend=c("Leaders","Followers"),
       col=c("blue","lightblue"), lty=c(1,2), lwd=c(3,4), cex=0.6)
abline(v = as.yearmon("Oct 2016"), lty = 3, col = "gray")
text(x = as.yearmon("Oct 2016"), y = 0.085, labels = "Access\nHollywood \nTape", cex = 0.6)
abline(v = as.yearmon("Jan 2017"), lty = 3, col = "gray")
text(x = as.yearmon("Jan 2017"), y = 0.02, labels = "Women's March", cex = 0.6)
abline(v = as.yearmon("Jul 2017"), lty = 3, col = "gray")
text(x = as.yearmon("Jul 2017"), y = 0.076, labels = "Trump Announces\nBan on\nTransgender\nService Members\nin Military", cex = 0.6)
abline(v = as.yearmon("Sep 2017"), lty = 3, col = "gray")
text(x = as.yearmon("Sep 2017"), y = 0.023, labels = "Betsy Devos\nRescinds\nTitle IV\nGuidance", cex = 0.6)
abline(v = as.yearmon("Feb 2018"), lty = 3, col = "gray")
text(x = as.yearmon("Feb 2018"), y = 0.092, labels = "Stormy Daniels\nHush Money\nScandal", cex = 0.6)
abline(v = as.yearmon("Jun 2018"), lty = 3, col = "gray")
text(x = as.yearmon("Jun 2018"), y = 0.024, labels = "Supreme Court\nRuling That\nBusiness Can\nRefuse Services\nto Gay Couples", cex = 0.6)
abline(v = as.yearmon("Sep 2018"), lty = 3, col = "gray")
text(x = as.yearmon("Sep 2018"), y = 0.08, labels = "Christine\nBlasey Ford\nTestifies\nRegarding\nAssault Allegations\nA
     gainst\nBrett Kavanaugh", cex = 0.6)
abline(v = as.yearmon("Jan 2020"), lty = 3, col = "gray")
text(x = as.yearmon("Jan 2020"), y = 0.08, labels = "Trump Attends\nMarch for Life", cex = 0.6)
abline(v = as.yearmon("Sep 2020"), lty = 3, col = "gray")
text(x = as.yearmon("Sep 2020"), y = 0.02, labels = "Ruth Bader Kinsburg\nPasses Away", cex = 0.6)

# Partisan
plot(aggmonth$month[aggmonth$seed==1], aggmonth$Partisan[aggmonth$seed==1],
     col="blue", type="l", lty=1, lwd=3,
     ylab="Partisan Rhetoric", xlab="Time", ylim=c(0.08, 0.5))
lines(aggmonth$month[aggmonth$seed==0], aggmonth$Partisan[aggmonth$seed==0],
      col="lightblue", type="l", lty=4, lwd=3)
legend(as.yearmon("October 2020"), 0.49, legend=c("Leaders","Followers"),
       col=c("blue","lightblue"), lty=c(1,2), lwd=c(3,4), cex=0.6)
abline(v = as.yearmon("Nov 2016"), lty = 3, col = "gray")
text(x = as.yearmon("Nov 2016"), y = 0.46, labels = "US\nPresidential\nElection", cex = 0.6)
abline(v = as.yearmon("May 2017"), lty = 3, col = "gray")
text(x = as.yearmon("May 2017"), y = 0.4, labels = "Robert Muller\nAppointed\nSpecial Counsel", cex = 0.6)
abline(v = as.yearmon("Feb 2018"), lty = 3, col = "gray")
text(x = as.yearmon("Feb 2018"), y = 0.26, labels = "Indictments on\nRussian Election\nInterference", cex = 0.6)
abline(v = as.yearmon("Nov 2018"), lty = 3, col = "gray")
text(x = as.yearmon("Nov 2018"), y = 0.4, labels = "Midterm Elections", cex = 0.6)
abline(v = as.yearmon("Dec 2019"), lty = 3, col = "gray")
text(x = as.yearmon("Dec 2019"), y = 0.4, labels = "Trump Impeached", cex = 0.6)
abline(v = as.yearmon("Nov 2020"), lty = 3, col = "gray")
text(x = as.yearmon("Nov 2020"), y = 0.376, labels = "US\nPresidential\nElection", cex = 0.6)
abline(v = as.yearmon("Jan 2021"), lty = 3, col = "gray")
text(x = as.yearmon("Jan 2021"), y = 0.31, labels = "January 6\nInsurrection", cex = 0.6)

# Religion
plot(aggmonth$month[aggmonth$seed==1], aggmonth$Religion[aggmonth$seed==1],
     col="blue", type="l", lty=1, lwd=3,
     ylab="Religious Rhetoric", xlab="Time", ylim=c(0, 0.12))
lines(aggmonth$month[aggmonth$seed==0], aggmonth$Religion[aggmonth$seed==0],
      col="lightblue", type="l", lty=4, lwd=3)
legend(as.yearmon("October 2020"), 0.11, legend=c("Leaders","Followers"),
       col=c("blue","lightblue"), lty=c(1,2), lwd=c(3,4), cex=0.6)
abline(v = as.yearmon("Dec 2016"), lty = 3, col = "gray")
text(x = as.yearmon("Dec 2016"), y = 0.005, labels = "Christmas", cex = 0.6)
abline(v = as.yearmon("Jan 2017"), lty = 3, col = "gray")
text(x = as.yearmon("Jan 2017"), y = 0.1, labels = "Muslim\nTravel\nBan", cex = 0.6)
abline(v = as.yearmon("May 2017"), lty = 3, col = "gray")
text(x = as.yearmon("May 2017"), y = 0.03, labels = "Trump Signs\nExecutive Order\nLimiting the\nJohnson Amendment", cex = 0.6)
abline(v = as.yearmon("Dec 2017"), lty = 3, col = "gray")
text(x = as.yearmon("Dec 2017"), y = 0.005, labels = "Christmas", cex = 0.6)
abline(v = as.yearmon("Jun 2018"), lty = 3, col = "gray")
text(x = as.yearmon("Jun 2018"), y = 0.085, labels = "Supreme Court\nRuling That\nBusiness Can\nRefuse Services\nto Gay Couples", cex = 0.6)
abline(v = as.yearmon("Dec 2018"), lty = 3, col = "gray")
text(x = as.yearmon("Dec 2018"), y = 0.005, labels = "Christmas", cex = 0.6)
abline(v = as.yearmon("Dec 2019"), lty = 3, col = "gray")
text(x = as.yearmon("Dec 2019"), y = 0.005, labels = "Christmas", cex = 0.6)
abline(v = as.yearmon("Jun 2020"), lty = 3, col = "gray")
text(x = as.yearmon("Jun 2020"), y = 0.06, labels = "Trump Hold\nBible Up at\nSt Johns Church", cex = 0.6)
abline(v = as.yearmon("Dec 2020"), lty = 3, col = "gray")
text(x = as.yearmon("Dec 2020"), y = 0.005, labels = "Christmas", cex = 0.6)
dev.off()

##### Figure A2: Number of Posts on Twitter and Gab Over Time
gabs_agg1 <- gabs %>% group_by(month) %>% summarize(n = n())
tweets_agg1 <- tweets %>% group_by(month) %>% summarize(n = n())

##### Figure A3: Number of Posts from Leaders on Twitter and Gab Over Time
gabs_agg2 <- gabs[gabs$seed==1,] %>% group_by(month) %>% summarize(n = n())
tweets_agg2 <- tweets[tweets$seed==1,] %>% group_by(month) %>% summarize(n = n())

pdf("../plots/FigureA2A3.pdf", width = 7, height = 4)
plot(gabs_agg1$month[gabs_agg1$month <= as.yearmon("June 2021")],
     gabs_agg1$n[gabs_agg1$month <= as.yearmon("June 2021")],
     col="green", type="l", lty=1, lwd=3,
     ylab="Number of Posts", xlab="Time", main="Number of Posts on Gab versus Twitter",
     xlim=c(as.yearmon("Jan 2017"), as.yearmon("February 2021")), ylim=c(0, 320000))
lines(tweets_agg1$month, tweets_agg1$n, col="blue", lty=1, lwd=3)
legend(as.yearmon("May 2020"), 300000, legend=c("Gab","Twitter"),
       col=c("green","blue"), lty=c(1,1), lwd=c(3,3), cex=0.8)

plot(gabs_agg2$month[gabs_agg2$month <= as.yearmon("June 2021")],
     gabs_agg2$n[gabs_agg2$month <= as.yearmon("June 2021")],
     col="green", type="l", lty=1, lwd=3,
     ylab="Number of Leaeders' Posts", xlab="Time", main="Number of Posts from Leaders on Gab versus Twitter",
     xlim=c(as.yearmon("Jan 2017"), as.yearmon("February 2021")), ylim=c(0, 8700))
lines(tweets_agg2$month, tweets_agg2$n, col="blue", lty=1, lwd=3)
legend(as.yearmon("May 2020"), 7800, legend=c("Gab","Twitter"),
       col=c("green","blue"), lty=c(1,1), lwd=c(3,3), cex=0.8)
dev.off()

